BSF Biomineralization Project

Cell cleavage

Set up workspace

Load libraries

library("ggplot2")
library("tidyverse")

Import the data

raw_data <- read.csv("5-Developmental_progression_and_size/Input/cell_cleavage.csv" , header = TRUE, sep = ",")

Change sample_ID and Treatment to Factor variables

raw_data$sample_ID <- as.factor(raw_data$sample_ID)
str(raw_data$sample_ID)
##  Factor w/ 9 levels "144","145","146",..: 1 2 3 4 5 6 7 8 9
raw_data$treatment <- as.factor(raw_data$treatment)
str(raw_data$treatment)
##  Factor w/ 3 levels "AMB","l","xl": 1 1 2 3 3 2 1 2 3

Normalize the counts based on number of cells per sample

Calculate total cells per sample

total_counts_per_sample <- raw_data %>%
  mutate(total_counts_per_sample = One_cell+Two_cell+Four_cell+Greater_than_4_cell) %>% 
  arrange(treatment)

show(total_counts_per_sample)
##   sample_ID treatment One_cell Two_cell Four_cell Greater_than_4_cell
## 1       144       AMB       72       31        32                  18
## 2       145       AMB       47       35        37                  92
## 3       150       AMB      220        5         8                   4
## 4       146         l       80       85        65                  18
## 5       149         l       43       37         9                   2
## 6       151         l      124       85        58                  15
## 7       147        xl       18       17        34                  13
## 8       148        xl       98       58        14                   2
## 9       152        xl       91       95        41                  10
##   total_counts_per_sample
## 1                     153
## 2                     211
## 3                     237
## 4                     248
## 5                      91
## 6                     282
## 7                      82
## 8                     172
## 9                     237

Calculate % at each stage

normalized_cleavage_counts <- total_counts_per_sample %>%
  mutate(percent_one_cell = (One_cell/total_counts_per_sample)*100) %>% 
  mutate(percent_two_cell = (Two_cell/total_counts_per_sample)*100) %>%
  mutate(percent_four_cell = (Four_cell/total_counts_per_sample)*100) %>%
  mutate(percent_greater_than_four_cell = (Greater_than_4_cell/total_counts_per_sample)*100)

show(normalized_cleavage_counts)
##   sample_ID treatment One_cell Two_cell Four_cell Greater_than_4_cell
## 1       144       AMB       72       31        32                  18
## 2       145       AMB       47       35        37                  92
## 3       150       AMB      220        5         8                   4
## 4       146         l       80       85        65                  18
## 5       149         l       43       37         9                   2
## 6       151         l      124       85        58                  15
## 7       147        xl       18       17        34                  13
## 8       148        xl       98       58        14                   2
## 9       152        xl       91       95        41                  10
##   total_counts_per_sample percent_one_cell percent_two_cell percent_four_cell
## 1                     153         47.05882        20.261438         20.915033
## 2                     211         22.27488        16.587678         17.535545
## 3                     237         92.82700         2.109705          3.375527
## 4                     248         32.25806        34.274194         26.209677
## 5                      91         47.25275        40.659341          9.890110
## 6                     282         43.97163        30.141844         20.567376
## 7                      82         21.95122        20.731707         41.463415
## 8                     172         56.97674        33.720930          8.139535
## 9                     237         38.39662        40.084388         17.299578
##   percent_greater_than_four_cell
## 1                      11.764706
## 2                      43.601896
## 3                       1.687764
## 4                       7.258065
## 5                       2.197802
## 6                       5.319149
## 7                      15.853659
## 8                       1.162791
## 9                       4.219409
str(normalized_cleavage_counts)
## 'data.frame':    9 obs. of  11 variables:
##  $ sample_ID                     : Factor w/ 9 levels "144","145","146",..: 1 2 7 3 6 8 4 5 9
##  $ treatment                     : Factor w/ 3 levels "AMB","l","xl": 1 1 1 2 2 2 3 3 3
##  $ One_cell                      : int  72 47 220 80 43 124 18 98 91
##  $ Two_cell                      : int  31 35 5 85 37 85 17 58 95
##  $ Four_cell                     : int  32 37 8 65 9 58 34 14 41
##  $ Greater_than_4_cell           : int  18 92 4 18 2 15 13 2 10
##  $ total_counts_per_sample       : int  153 211 237 248 91 282 82 172 237
##  $ percent_one_cell              : num  47.1 22.3 92.8 32.3 47.3 ...
##  $ percent_two_cell              : num  20.26 16.59 2.11 34.27 40.66 ...
##  $ percent_four_cell             : num  20.92 17.54 3.38 26.21 9.89 ...
##  $ percent_greater_than_four_cell: num  11.76 43.6 1.69 7.26 2.2 ...
gathered_cleavage_counts <- normalized_cleavage_counts %>% 
  select(sample_ID, treatment, percent_one_cell:percent_greater_than_four_cell) %>% 
  gather(cleavage_stage, percent, -sample_ID, -treatment, convert = FALSE, na.rm = TRUE, factor_key = TRUE)

show(gathered_cleavage_counts)
##    sample_ID treatment                 cleavage_stage   percent
## 1        144       AMB               percent_one_cell 47.058824
## 2        145       AMB               percent_one_cell 22.274882
## 3        150       AMB               percent_one_cell 92.827004
## 4        146         l               percent_one_cell 32.258065
## 5        149         l               percent_one_cell 47.252747
## 6        151         l               percent_one_cell 43.971631
## 7        147        xl               percent_one_cell 21.951220
## 8        148        xl               percent_one_cell 56.976744
## 9        152        xl               percent_one_cell 38.396624
## 10       144       AMB               percent_two_cell 20.261438
## 11       145       AMB               percent_two_cell 16.587678
## 12       150       AMB               percent_two_cell  2.109705
## 13       146         l               percent_two_cell 34.274194
## 14       149         l               percent_two_cell 40.659341
## 15       151         l               percent_two_cell 30.141844
## 16       147        xl               percent_two_cell 20.731707
## 17       148        xl               percent_two_cell 33.720930
## 18       152        xl               percent_two_cell 40.084388
## 19       144       AMB              percent_four_cell 20.915033
## 20       145       AMB              percent_four_cell 17.535545
## 21       150       AMB              percent_four_cell  3.375527
## 22       146         l              percent_four_cell 26.209677
## 23       149         l              percent_four_cell  9.890110
## 24       151         l              percent_four_cell 20.567376
## 25       147        xl              percent_four_cell 41.463415
## 26       148        xl              percent_four_cell  8.139535
## 27       152        xl              percent_four_cell 17.299578
## 28       144       AMB percent_greater_than_four_cell 11.764706
## 29       145       AMB percent_greater_than_four_cell 43.601896
## 30       150       AMB percent_greater_than_four_cell  1.687764
## 31       146         l percent_greater_than_four_cell  7.258065
## 32       149         l percent_greater_than_four_cell  2.197802
## 33       151         l percent_greater_than_four_cell  5.319149
## 34       147        xl percent_greater_than_four_cell 15.853659
## 35       148        xl percent_greater_than_four_cell  1.162791
## 36       152        xl percent_greater_than_four_cell  4.219409

2-Way ANOVA with Cleavage Nested within Treatment

cell_cleavage_anova <- aov(percent ~ cleavage_stage/treatment, data = gathered_cleavage_counts)
summary(cell_cleavage_anova)
##                          Df Sum Sq Mean Sq F value   Pr(>F)    
## cleavage_stage            3   5868  1956.2   8.194 0.000627 ***
## cleavage_stage:treatment  8   1687   210.9   0.883 0.544520    
## Residuals                24   5729   238.7                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(cell_cleavage_anova)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = percent ~ cleavage_stage/treatment, data = gathered_cleavage_counts)
## 
## $cleavage_stage
##                                                        diff       lwr
## percent_two_cell-percent_one_cell                -18.266280 -38.35874
## percent_four_cell-percent_one_cell               -26.396883 -46.48934
## percent_greater_than_four_cell-percent_one_cell  -34.433611 -54.52607
## percent_four_cell-percent_two_cell                -8.130603 -28.22306
## percent_greater_than_four_cell-percent_two_cell  -16.167332 -36.25979
## percent_greater_than_four_cell-percent_four_cell  -8.036728 -28.12919
##                                                         upr     p adj
## percent_two_cell-percent_one_cell                  1.826181 0.0840869
## percent_four_cell-percent_one_cell                -6.304422 0.0069159
## percent_greater_than_four_cell-percent_one_cell  -14.341150 0.0004532
## percent_four_cell-percent_two_cell                11.961858 0.6832423
## percent_greater_than_four_cell-percent_two_cell    3.925129 0.1465336
## percent_greater_than_four_cell-percent_four_cell  12.055732 0.6909127
## 
## $`cleavage_stage:treatment`
##                                                                             diff
## percent_two_cell:AMB-percent_one_cell:AMB                            -41.0672963
## percent_four_cell:AMB-percent_one_cell:AMB                           -40.1115347
## percent_greater_than_four_cell:AMB-percent_one_cell:AMB              -35.0354480
## percent_one_cell:l-percent_one_cell:AMB                              -12.8927554
## percent_two_cell:l-percent_one_cell:AMB                              -19.0284437
## percent_four_cell:l-percent_one_cell:AMB                             -35.1645154
## percent_greater_than_four_cell:l-percent_one_cell:AMB                -49.1285645
## percent_one_cell:xl-percent_one_cell:AMB                             -14.9453737
## percent_two_cell:xl-percent_one_cell:AMB                             -22.5412278
## percent_four_cell:xl-percent_one_cell:AMB                            -31.7527272
## percent_greater_than_four_cell:xl-percent_one_cell:AMB               -46.9749502
## percent_four_cell:AMB-percent_two_cell:AMB                             0.9557616
## percent_greater_than_four_cell:AMB-percent_two_cell:AMB                6.0318484
## percent_one_cell:l-percent_two_cell:AMB                               28.1745409
## percent_two_cell:l-percent_two_cell:AMB                               22.0388526
## percent_four_cell:l-percent_two_cell:AMB                               5.9027810
## percent_greater_than_four_cell:l-percent_two_cell:AMB                 -8.0612682
## percent_one_cell:xl-percent_two_cell:AMB                              26.1219226
## percent_two_cell:xl-percent_two_cell:AMB                              18.5260685
## percent_four_cell:xl-percent_two_cell:AMB                              9.3145691
## percent_greater_than_four_cell:xl-percent_two_cell:AMB                -5.9076539
## percent_greater_than_four_cell:AMB-percent_four_cell:AMB               5.0760867
## percent_one_cell:l-percent_four_cell:AMB                              27.2187793
## percent_two_cell:l-percent_four_cell:AMB                              21.0830910
## percent_four_cell:l-percent_four_cell:AMB                              4.9470194
## percent_greater_than_four_cell:l-percent_four_cell:AMB                -9.0170298
## percent_one_cell:xl-percent_four_cell:AMB                             25.1661610
## percent_two_cell:xl-percent_four_cell:AMB                             17.5703069
## percent_four_cell:xl-percent_four_cell:AMB                             8.3588075
## percent_greater_than_four_cell:xl-percent_four_cell:AMB               -6.8634155
## percent_one_cell:l-percent_greater_than_four_cell:AMB                 22.1426925
## percent_two_cell:l-percent_greater_than_four_cell:AMB                 16.0070043
## percent_four_cell:l-percent_greater_than_four_cell:AMB                -0.1290674
## percent_greater_than_four_cell:l-percent_greater_than_four_cell:AMB  -14.0931166
## percent_one_cell:xl-percent_greater_than_four_cell:AMB                20.0900743
## percent_two_cell:xl-percent_greater_than_four_cell:AMB                12.4942201
## percent_four_cell:xl-percent_greater_than_four_cell:AMB                3.2827207
## percent_greater_than_four_cell:xl-percent_greater_than_four_cell:AMB -11.9395023
## percent_two_cell:l-percent_one_cell:l                                 -6.1356883
## percent_four_cell:l-percent_one_cell:l                               -22.2717599
## percent_greater_than_four_cell:l-percent_one_cell:l                  -36.2358091
## percent_one_cell:xl-percent_one_cell:l                                -2.0526183
## percent_two_cell:xl-percent_one_cell:l                                -9.6484724
## percent_four_cell:xl-percent_one_cell:l                              -18.8599718
## percent_greater_than_four_cell:xl-percent_one_cell:l                 -34.0821948
## percent_four_cell:l-percent_two_cell:l                               -16.1360717
## percent_greater_than_four_cell:l-percent_two_cell:l                  -30.1001208
## percent_one_cell:xl-percent_two_cell:l                                 4.0830700
## percent_two_cell:xl-percent_two_cell:l                                -3.5127841
## percent_four_cell:xl-percent_two_cell:l                              -12.7242835
## percent_greater_than_four_cell:xl-percent_two_cell:l                 -27.9465066
## percent_greater_than_four_cell:l-percent_four_cell:l                 -13.9640492
## percent_one_cell:xl-percent_four_cell:l                               20.2191417
## percent_two_cell:xl-percent_four_cell:l                               12.6232875
## percent_four_cell:xl-percent_four_cell:l                               3.4117881
## percent_greater_than_four_cell:xl-percent_four_cell:l                -11.8104349
## percent_one_cell:xl-percent_greater_than_four_cell:l                  34.1831908
## percent_two_cell:xl-percent_greater_than_four_cell:l                  26.5873367
## percent_four_cell:xl-percent_greater_than_four_cell:l                 17.3758373
## percent_greater_than_four_cell:xl-percent_greater_than_four_cell:l     2.1536143
## percent_two_cell:xl-percent_one_cell:xl                               -7.5958541
## percent_four_cell:xl-percent_one_cell:xl                             -16.8073535
## percent_greater_than_four_cell:xl-percent_one_cell:xl                -32.0295766
## percent_four_cell:xl-percent_two_cell:xl                              -9.2114994
## percent_greater_than_four_cell:xl-percent_two_cell:xl                -24.4337224
## percent_greater_than_four_cell:xl-percent_four_cell:xl               -15.2222230
##                                                                            lwr
## percent_two_cell:AMB-percent_one_cell:AMB                            -86.55403
## percent_four_cell:AMB-percent_one_cell:AMB                           -85.59826
## percent_greater_than_four_cell:AMB-percent_one_cell:AMB              -80.52218
## percent_one_cell:l-percent_one_cell:AMB                              -58.37948
## percent_two_cell:l-percent_one_cell:AMB                              -64.51517
## percent_four_cell:l-percent_one_cell:AMB                             -80.65124
## percent_greater_than_four_cell:l-percent_one_cell:AMB                -94.61529
## percent_one_cell:xl-percent_one_cell:AMB                             -60.43210
## percent_two_cell:xl-percent_one_cell:AMB                             -68.02796
## percent_four_cell:xl-percent_one_cell:AMB                            -77.23946
## percent_greater_than_four_cell:xl-percent_one_cell:AMB               -92.46168
## percent_four_cell:AMB-percent_two_cell:AMB                           -44.53097
## percent_greater_than_four_cell:AMB-percent_two_cell:AMB              -39.45488
## percent_one_cell:l-percent_two_cell:AMB                              -17.31219
## percent_two_cell:l-percent_two_cell:AMB                              -23.44788
## percent_four_cell:l-percent_two_cell:AMB                             -39.58395
## percent_greater_than_four_cell:l-percent_two_cell:AMB                -53.54800
## percent_one_cell:xl-percent_two_cell:AMB                             -19.36481
## percent_two_cell:xl-percent_two_cell:AMB                             -26.96066
## percent_four_cell:xl-percent_two_cell:AMB                            -36.17216
## percent_greater_than_four_cell:xl-percent_two_cell:AMB               -51.39438
## percent_greater_than_four_cell:AMB-percent_four_cell:AMB             -40.41064
## percent_one_cell:l-percent_four_cell:AMB                             -18.26795
## percent_two_cell:l-percent_four_cell:AMB                             -24.40364
## percent_four_cell:l-percent_four_cell:AMB                            -40.53971
## percent_greater_than_four_cell:l-percent_four_cell:AMB               -54.50376
## percent_one_cell:xl-percent_four_cell:AMB                            -20.32057
## percent_two_cell:xl-percent_four_cell:AMB                            -27.91642
## percent_four_cell:xl-percent_four_cell:AMB                           -37.12792
## percent_greater_than_four_cell:xl-percent_four_cell:AMB              -52.35014
## percent_one_cell:l-percent_greater_than_four_cell:AMB                -23.34404
## percent_two_cell:l-percent_greater_than_four_cell:AMB                -29.47972
## percent_four_cell:l-percent_greater_than_four_cell:AMB               -45.61580
## percent_greater_than_four_cell:l-percent_greater_than_four_cell:AMB  -59.57985
## percent_one_cell:xl-percent_greater_than_four_cell:AMB               -25.39665
## percent_two_cell:xl-percent_greater_than_four_cell:AMB               -32.99251
## percent_four_cell:xl-percent_greater_than_four_cell:AMB              -42.20401
## percent_greater_than_four_cell:xl-percent_greater_than_four_cell:AMB -57.42623
## percent_two_cell:l-percent_one_cell:l                                -51.62242
## percent_four_cell:l-percent_one_cell:l                               -67.75849
## percent_greater_than_four_cell:l-percent_one_cell:l                  -81.72254
## percent_one_cell:xl-percent_one_cell:l                               -47.53935
## percent_two_cell:xl-percent_one_cell:l                               -55.13520
## percent_four_cell:xl-percent_one_cell:l                              -64.34670
## percent_greater_than_four_cell:xl-percent_one_cell:l                 -79.56892
## percent_four_cell:l-percent_two_cell:l                               -61.62280
## percent_greater_than_four_cell:l-percent_two_cell:l                  -75.58685
## percent_one_cell:xl-percent_two_cell:l                               -41.40366
## percent_two_cell:xl-percent_two_cell:l                               -48.99951
## percent_four_cell:xl-percent_two_cell:l                              -58.21101
## percent_greater_than_four_cell:xl-percent_two_cell:l                 -73.43324
## percent_greater_than_four_cell:l-percent_four_cell:l                 -59.45078
## percent_one_cell:xl-percent_four_cell:l                              -25.26759
## percent_two_cell:xl-percent_four_cell:l                              -32.86344
## percent_four_cell:xl-percent_four_cell:l                             -42.07494
## percent_greater_than_four_cell:xl-percent_four_cell:l                -57.29716
## percent_one_cell:xl-percent_greater_than_four_cell:l                 -11.30354
## percent_two_cell:xl-percent_greater_than_four_cell:l                 -18.89939
## percent_four_cell:xl-percent_greater_than_four_cell:l                -28.11089
## percent_greater_than_four_cell:xl-percent_greater_than_four_cell:l   -43.33311
## percent_two_cell:xl-percent_one_cell:xl                              -53.08258
## percent_four_cell:xl-percent_one_cell:xl                             -62.29408
## percent_greater_than_four_cell:xl-percent_one_cell:xl                -77.51631
## percent_four_cell:xl-percent_two_cell:xl                             -54.69823
## percent_greater_than_four_cell:xl-percent_two_cell:xl                -69.92045
## percent_greater_than_four_cell:xl-percent_four_cell:xl               -60.70895
##                                                                            upr
## percent_two_cell:AMB-percent_one_cell:AMB                             4.419433
## percent_four_cell:AMB-percent_one_cell:AMB                            5.375194
## percent_greater_than_four_cell:AMB-percent_one_cell:AMB              10.451281
## percent_one_cell:l-percent_one_cell:AMB                              32.593974
## percent_two_cell:l-percent_one_cell:AMB                              26.458285
## percent_four_cell:l-percent_one_cell:AMB                             10.322214
## percent_greater_than_four_cell:l-percent_one_cell:AMB                -3.641835
## percent_one_cell:xl-percent_one_cell:AMB                             30.541355
## percent_two_cell:xl-percent_one_cell:AMB                             22.945501
## percent_four_cell:xl-percent_one_cell:AMB                            13.734002
## percent_greater_than_four_cell:xl-percent_one_cell:AMB               -1.488221
## percent_four_cell:AMB-percent_two_cell:AMB                           46.442491
## percent_greater_than_four_cell:AMB-percent_two_cell:AMB              51.518578
## percent_one_cell:l-percent_two_cell:AMB                              73.661270
## percent_two_cell:l-percent_two_cell:AMB                              67.525582
## percent_four_cell:l-percent_two_cell:AMB                             51.389510
## percent_greater_than_four_cell:l-percent_two_cell:AMB                37.425461
## percent_one_cell:xl-percent_two_cell:AMB                             71.608652
## percent_two_cell:xl-percent_two_cell:AMB                             64.012798
## percent_four_cell:xl-percent_two_cell:AMB                            54.801298
## percent_greater_than_four_cell:xl-percent_two_cell:AMB               39.579075
## percent_greater_than_four_cell:AMB-percent_four_cell:AMB             50.562816
## percent_one_cell:l-percent_four_cell:AMB                             72.705508
## percent_two_cell:l-percent_four_cell:AMB                             66.569820
## percent_four_cell:l-percent_four_cell:AMB                            50.433749
## percent_greater_than_four_cell:l-percent_four_cell:AMB               36.469699
## percent_one_cell:xl-percent_four_cell:AMB                            70.652890
## percent_two_cell:xl-percent_four_cell:AMB                            63.057036
## percent_four_cell:xl-percent_four_cell:AMB                           53.845537
## percent_greater_than_four_cell:xl-percent_four_cell:AMB              38.623314
## percent_one_cell:l-percent_greater_than_four_cell:AMB                67.629422
## percent_two_cell:l-percent_greater_than_four_cell:AMB                61.493733
## percent_four_cell:l-percent_greater_than_four_cell:AMB               45.357662
## percent_greater_than_four_cell:l-percent_greater_than_four_cell:AMB  31.393613
## percent_one_cell:xl-percent_greater_than_four_cell:AMB               65.576803
## percent_two_cell:xl-percent_greater_than_four_cell:AMB               57.980949
## percent_four_cell:xl-percent_greater_than_four_cell:AMB              48.769450
## percent_greater_than_four_cell:xl-percent_greater_than_four_cell:AMB 33.547227
## percent_two_cell:l-percent_one_cell:l                                39.351041
## percent_four_cell:l-percent_one_cell:l                               23.214969
## percent_greater_than_four_cell:l-percent_one_cell:l                   9.250920
## percent_one_cell:xl-percent_one_cell:l                               43.434111
## percent_two_cell:xl-percent_one_cell:l                               35.838257
## percent_four_cell:xl-percent_one_cell:l                              26.626757
## percent_greater_than_four_cell:xl-percent_one_cell:l                 11.404534
## percent_four_cell:l-percent_two_cell:l                               29.350658
## percent_greater_than_four_cell:l-percent_two_cell:l                  15.386608
## percent_one_cell:xl-percent_two_cell:l                               49.569799
## percent_two_cell:xl-percent_two_cell:l                               41.973945
## percent_four_cell:xl-percent_two_cell:l                              32.762446
## percent_greater_than_four_cell:xl-percent_two_cell:l                 17.540223
## percent_greater_than_four_cell:l-percent_four_cell:l                 31.522680
## percent_one_cell:xl-percent_four_cell:l                              65.705871
## percent_two_cell:xl-percent_four_cell:l                              58.110017
## percent_four_cell:xl-percent_four_cell:l                             48.898517
## percent_greater_than_four_cell:xl-percent_four_cell:l                33.676294
## percent_one_cell:xl-percent_greater_than_four_cell:l                 79.669920
## percent_two_cell:xl-percent_greater_than_four_cell:l                 72.074066
## percent_four_cell:xl-percent_greater_than_four_cell:l                62.862566
## percent_greater_than_four_cell:xl-percent_greater_than_four_cell:l   47.640343
## percent_two_cell:xl-percent_one_cell:xl                              37.890875
## percent_four_cell:xl-percent_one_cell:xl                             28.679376
## percent_greater_than_four_cell:xl-percent_one_cell:xl                13.457153
## percent_four_cell:xl-percent_two_cell:xl                             36.275230
## percent_greater_than_four_cell:xl-percent_two_cell:xl                21.053007
## percent_greater_than_four_cell:xl-percent_four_cell:xl               30.264506
##                                                                          p adj
## percent_two_cell:AMB-percent_one_cell:AMB                            0.1035374
## percent_four_cell:AMB-percent_one_cell:AMB                           0.1202209
## percent_greater_than_four_cell:AMB-percent_one_cell:AMB              0.2500672
## percent_one_cell:l-percent_one_cell:AMB                              0.9954403
## percent_two_cell:l-percent_one_cell:AMB                              0.9239910
## percent_four_cell:l-percent_one_cell:AMB                             0.2458087
## percent_greater_than_four_cell:l-percent_one_cell:AMB                0.0264605
## percent_one_cell:xl-percent_one_cell:AMB                             0.9853196
## percent_two_cell:xl-percent_one_cell:AMB                             0.8097694
## percent_four_cell:xl-percent_one_cell:AMB                            0.3756531
## percent_greater_than_four_cell:xl-percent_one_cell:AMB               0.0386812
## percent_four_cell:AMB-percent_two_cell:AMB                           1.0000000
## percent_greater_than_four_cell:AMB-percent_two_cell:AMB              0.9999967
## percent_one_cell:l-percent_two_cell:AMB                              0.5435450
## percent_two_cell:l-percent_two_cell:AMB                              0.8295493
## percent_four_cell:l-percent_two_cell:AMB                             0.9999973
## percent_greater_than_four_cell:l-percent_two_cell:AMB                0.9999376
## percent_one_cell:xl-percent_two_cell:AMB                             0.6457900
## percent_two_cell:xl-percent_two_cell:AMB                             0.9354673
## percent_four_cell:xl-percent_two_cell:AMB                            0.9997485
## percent_greater_than_four_cell:xl-percent_two_cell:AMB               0.9999973
## percent_greater_than_four_cell:AMB-percent_four_cell:AMB             0.9999994
## percent_one_cell:l-percent_four_cell:AMB                             0.5911913
## percent_two_cell:l-percent_four_cell:AMB                             0.8641831
## percent_four_cell:l-percent_four_cell:AMB                            0.9999996
## percent_greater_than_four_cell:l-percent_four_cell:AMB               0.9998153
## percent_one_cell:xl-percent_four_cell:AMB                            0.6924475
## percent_two_cell:xl-percent_four_cell:AMB                            0.9539883
## percent_four_cell:xl-percent_four_cell:AMB                           0.9999110
## percent_greater_than_four_cell:xl-percent_four_cell:AMB              0.9999875
## percent_one_cell:l-percent_greater_than_four_cell:AMB                0.8255459
## percent_two_cell:l-percent_greater_than_four_cell:AMB                0.9757088
## percent_four_cell:l-percent_greater_than_four_cell:AMB               1.0000000
## percent_greater_than_four_cell:l-percent_greater_than_four_cell:AMB  0.9906533
## percent_one_cell:xl-percent_greater_than_four_cell:AMB               0.8956646
## percent_two_cell:xl-percent_greater_than_four_cell:AMB               0.9964914
## percent_four_cell:xl-percent_greater_than_four_cell:AMB              1.0000000
## percent_greater_than_four_cell:xl-percent_greater_than_four_cell:AMB 0.9976162
## percent_two_cell:l-percent_one_cell:l                                0.9999960
## percent_four_cell:l-percent_one_cell:l                               0.8205073
## percent_greater_than_four_cell:l-percent_one_cell:l                  0.2124773
## percent_one_cell:xl-percent_one_cell:l                               1.0000000
## percent_two_cell:xl-percent_one_cell:l                               0.9996497
## percent_four_cell:xl-percent_one_cell:l                              0.9279763
## percent_greater_than_four_cell:xl-percent_one_cell:l                 0.2831433
## percent_four_cell:l-percent_two_cell:l                               0.9742801
## percent_greater_than_four_cell:l-percent_two_cell:l                  0.4501870
## percent_one_cell:xl-percent_two_cell:l                               0.9999999
## percent_two_cell:xl-percent_two_cell:l                               1.0000000
## percent_four_cell:xl-percent_two_cell:l                              0.9959121
## percent_greater_than_four_cell:xl-percent_two_cell:l                 0.5548776
## percent_greater_than_four_cell:l-percent_four_cell:l                 0.9913059
## percent_one_cell:xl-percent_four_cell:l                              0.8918425
## percent_two_cell:xl-percent_four_cell:l                              0.9961753
## percent_four_cell:xl-percent_four_cell:l                             1.0000000
## percent_greater_than_four_cell:xl-percent_four_cell:l                0.9978296
## percent_one_cell:xl-percent_greater_than_four_cell:l                 0.2795038
## percent_two_cell:xl-percent_greater_than_four_cell:l                 0.6226991
## percent_four_cell:xl-percent_greater_than_four_cell:l                0.9572457
## percent_greater_than_four_cell:xl-percent_greater_than_four_cell:l   1.0000000
## percent_two_cell:xl-percent_one_cell:xl                              0.9999653
## percent_four_cell:xl-percent_one_cell:xl                             0.9658313
## percent_greater_than_four_cell:xl-percent_one_cell:xl                0.3638355
## percent_four_cell:xl-percent_two_cell:xl                             0.9997737
## percent_greater_than_four_cell:xl-percent_two_cell:xl                0.7271145
## percent_greater_than_four_cell:xl-percent_four_cell:xl               0.9831588

Calculate stats for cleavage stage and treatment categories

##Just treatment
#Mean for all treatments should be 25 because 100/4=25. Standard dev may vary.
clvgstats <- select(gathered_cleavage_counts, treatment:percent)
clvgstats.amb <- clvgstats %>% filter(treatment=="AMB") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.l <- gathered_cleavage_counts %>% filter(treatment=="l") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.xl <- gathered_cleavage_counts %>% filter(treatment=="xl") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent)) 

treatment.clvgstats.rownames <- as.factor(c("AMB", "l", "xl"))
treatment.clvgstats <- as.data.frame(bind_rows(clvgstats.amb, clvgstats.l, clvgstats.xl, .id=NULL))
treatment.clvgstats$cleavage_stage <- treatment.clvgstats.rownames
treatment.clvgstats <- treatment.clvgstats[c(5,1,2,3,4)]
show(treatment.clvgstats)
##   cleavage_stage mean       sd      min      max
## 1            AMB   25 25.79005 1.687764 92.82700
## 2              l   25 15.77375 2.197802 47.25275
## 3             xl   25 17.13955 1.162791 56.97674
##Just cleavage
#Cleavage stage means should differ
clvgstats.onecell <- clvgstats %>% filter(cleavage_stage=="percent_one_cell") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.twocell <- clvgstats %>% filter(cleavage_stage=="percent_two_cell") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.fourcell <- clvgstats %>% filter(cleavage_stage=="percent_four_cell") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.gfourcell <- clvgstats %>% filter(cleavage_stage=="percent_greater_than_four_cell") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))

stage.clvgstats.rownames <- as.factor(c("One Cell", "Two Cells", "Four Cells", "> Four Cells"))
stage.clvgstats <- as.data.frame(bind_rows(clvgstats.onecell, clvgstats.twocell, clvgstats.fourcell, clvgstats.gfourcell, .id=NULL))
stage.clvgstats$cleavage_stage <- stage.clvgstats.rownames
stage.clvgstats <- stage.clvgstats[c(5,1,2,3,4)]
show(stage.clvgstats)
##   cleavage_stage     mean       sd       min      max
## 1       One Cell 44.77419 21.48958 21.951220 92.82700
## 2      Two Cells 26.50791 12.63031  2.109705 40.65934
## 3     Four Cells 18.37731 11.23231  3.375527 41.46341
## 4   > Four Cells 10.34058 13.39930  1.162791 43.60190
##Treatment within cleavage
clvgstats.onecell.amb <- clvgstats %>% filter(cleavage_stage=="percent_one_cell", treatment=="AMB") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.onecell.l <- clvgstats %>% filter(cleavage_stage=="percent_one_cell", treatment=="l") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.onecell.xl <- clvgstats %>% filter(cleavage_stage=="percent_one_cell", treatment=="xl") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.twocell.amb <- clvgstats %>% filter(cleavage_stage=="percent_two_cell", treatment=="AMB") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.twocell.l <- clvgstats %>% filter(cleavage_stage=="percent_two_cell", treatment=="l") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.twocell.xl <- clvgstats %>% filter(cleavage_stage=="percent_two_cell", treatment=="xl") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.fourcell.amb <- clvgstats %>% filter(cleavage_stage=="percent_four_cell", treatment=="AMB") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.fourcell.l <- clvgstats %>% filter(cleavage_stage=="percent_four_cell", treatment=="l") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.fourcell.xl <- clvgstats %>% filter(cleavage_stage=="percent_four_cell", treatment=="xl") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.gfourcell.amb <- clvgstats %>% filter(cleavage_stage=="percent_greater_than_four_cell", treatment=="AMB") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.gfourcell.l <- clvgstats %>% filter(cleavage_stage=="percent_greater_than_four_cell", treatment=="l") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.gfourcell.xl <- clvgstats %>% filter(cleavage_stage=="percent_greater_than_four_cell", treatment=="xl") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))

treatmentstage.clvgstats.rownames <- as.factor(c("OneCell_AMB", "OneCell_l", "OneCell_xl", "TwoCell_AMB", "TwoCell_l", "TwoCell_xl", "FourCell_AMB", "FourCell_l", "FourCell_xl", ">FourCell_AMB", ">FourCell_l", ">FourCell_xl"))
treatmentstage.clvgstats <- as.data.frame(bind_rows(clvgstats.onecell.amb, clvgstats.onecell.l, clvgstats.onecell.xl, clvgstats.twocell.amb, clvgstats.twocell.l, clvgstats.twocell.xl, clvgstats.fourcell.amb, clvgstats.fourcell.l, clvgstats.fourcell.xl, clvgstats.gfourcell.amb, clvgstats.gfourcell.l, clvgstats.gfourcell.xl, .id=NULL))
treatmentstage.clvgstats$cleavage_stage <- treatmentstage.clvgstats.rownames
treatmentstage.clvgstats <- treatmentstage.clvgstats[c(5,1,2,3,4)]
show(treatmentstage.clvgstats)
##    cleavage_stage      mean        sd       min       max
## 1     OneCell_AMB 54.053570 35.792392 22.274882 92.827004
## 2       OneCell_l 41.160814  7.882617 32.258065 47.252747
## 3      OneCell_xl 39.108196 17.523601 21.951220 56.976744
## 4     TwoCell_AMB 12.986273  9.596819  2.109705 20.261438
## 5       TwoCell_l 35.025126  5.298807 30.141844 40.659341
## 6      TwoCell_xl 31.512342  9.863567 20.731707 40.084388
## 7    FourCell_AMB 13.942035  9.305565  3.375527 20.915033
## 8      FourCell_l 18.889054  8.288223  9.890110 26.209677
## 9     FourCell_xl 22.300843 17.215683  8.139535 41.463415
## 10  >FourCell_AMB 19.018122 21.878246  1.687764 43.601896
## 11    >FourCell_l  4.925005  2.553052  2.197802  7.258065
## 12   >FourCell_xl  7.078620  7.751562  1.162791 15.853659

Plot mean_percent_cleavage

Calculate means at each stage

mpc <- normalized_cleavage_counts %>%
  group_by(treatment) %>% 
  summarize("One Cell" = mean(percent_one_cell),
            "Two Cells" = mean(percent_two_cell),
            "Four Cells" = mean(percent_four_cell),
            "> Four Cells" = mean(percent_greater_than_four_cell)) %>% 
  gather(cleavage_stage, mean_percent_cleavage, -treatment, na.rm = TRUE, factor_key = TRUE)
show(mpc)
## # A tibble: 12 x 3
##    treatment cleavage_stage mean_percent_cleavage
##    <fct>     <fct>                          <dbl>
##  1 AMB       One Cell                       54.1 
##  2 l         One Cell                       41.2 
##  3 xl        One Cell                       39.1 
##  4 AMB       Two Cells                      13.0 
##  5 l         Two Cells                      35.0 
##  6 xl        Two Cells                      31.5 
##  7 AMB       Four Cells                     13.9 
##  8 l         Four Cells                     18.9 
##  9 xl        Four Cells                     22.3 
## 10 AMB       > Four Cells                   19.0 
## 11 l         > Four Cells                    4.93
## 12 xl        > Four Cells                    7.08

Plot mean_percent_cleavage

final_cell_cleavage <- ggplot(mpc, aes(fill=cleavage_stage, y = mean_percent_cleavage, x=treatment)) + 
  geom_bar(position ="stack", stat = "identity") +
  xlab("pH Treatment") + ylab("Percent per treatment") + 
  scale_fill_brewer(name = "Number of Cells", labels = c("One", "Two", "Four", "> Four"), palette = "Paired") + #colour modification
  theme_bw() + #Set background color 
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) + #Set the plot background
  ggtitle("(a)") + #add a main title
  theme(plot.title = element_text(face = 'bold', 
                                  size = 12, 
                                  hjust = 0),
        legend.position = c(0.5, 1), #Place legend top middle of figure
        legend.direction = "horizontal", #Make legend list items in a row versus columns
        legend.background = element_rect(fill = NA), #Legend block no-fill
        legend.title = element_text(size = 10),
        legend.text = element_text(size = 8), legend.key.size = unit(0.5, "cm"),
  legend.key.width = unit(0.5,"cm"))
final_cell_cleavage

Growth through development

Set up workspace

Load libraries

library("tidyverse")
library("dplyr")
library("ggplot2")
library("gridExtra")

Import the data

raw_data <- read_csv("5-Developmental_progression_and_size/Input/size_metric_timeseries_data.csv")

raw_data$sample_ID <- as.factor(raw_data$sample_ID)
raw_data$time_point <- as.factor(raw_data$time_point)
raw_data$treatment <- as.factor(raw_data$treatment)
raw_data$TANK_NUM <- as.factor(raw_data$TANK_NUM)
raw_data$metric_id <- as.factor(raw_data$metric_id)

Check the type of each variable

str(raw_data)
## tibble [850 × 7] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ sample_ID : Factor w/ 34 levels "124","125","135",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ time_point: Factor w/ 7 levels "20190409_201",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ treatment : Factor w/ 4 levels "AMB","Gastrulation",..: NA NA NA NA NA NA NA NA NA NA ...
##  $ TANK_NUM  : Factor w/ 15 levels "1","2","3","4",..: NA NA NA NA NA NA NA NA NA NA ...
##  $ metric_id : Factor w/ 850 levels "124_egg_1","124_egg_10",..: 1 12 19 20 21 22 23 24 25 2 ...
##  $ width1    : num [1:850] 0.521 0.49 0.559 0.491 0.537 0.538 0.607 0.581 0.471 0.444 ...
##  $ width2    : num [1:850] 0.404 0.424 0.361 0.337 0.44 0.402 0.511 0.469 0.545 0.474 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   sample_ID = col_double(),
##   ..   time_point = col_character(),
##   ..   treatment = col_character(),
##   ..   TANK_NUM = col_double(),
##   ..   metric_id = col_character(),
##   ..   width1 = col_double(),
##   ..   width2 = col_double()
##   .. )

Examine volume at each life stage

Unfertilized Eggs (No treatment)

Calculate egg volume
egg_volume <- filter(raw_data, time_point == "Eggs") %>% 
  mutate(volume = ((4*pi*width1/6)*(width2/2)^2)) #Use equation for volume of ellipse
head(egg_volume)
## # A tibble: 6 x 8
##   sample_ID time_point treatment TANK_NUM metric_id width1 width2 volume
##   <fct>     <fct>      <fct>     <fct>    <fct>      <dbl>  <dbl>  <dbl>
## 1 124       Eggs       <NA>      <NA>     124_egg_1  0.521  0.404 0.0445
## 2 124       Eggs       <NA>      <NA>     124_egg_2  0.49   0.424 0.0461
## 3 124       Eggs       <NA>      <NA>     124_egg_3  0.559  0.361 0.0381
## 4 124       Eggs       <NA>      <NA>     124_egg_4  0.491  0.337 0.0292
## 5 124       Eggs       <NA>      <NA>     124_egg_5  0.537  0.44  0.0544
## 6 124       Eggs       <NA>      <NA>     124_egg_6  0.538  0.402 0.0455
Examine the data

Examine distribution of egg volume data

egg_volume_boxplot <- ggplot(egg_volume, aes(y = volume, x=treatment)) +
  ylab(expression("Volume " (mm^2))) + xlab("Treatment") + # Set x and y axis labels
  #expand_limits(y=c(0, 1.0)) +
  geom_jitter(colour = "cadetblue3", alpha = 0.5) +
  geom_boxplot(colour="cadetblue3", alpha=0) + 
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
                      axis.title.x = element_blank(), # No axis title
                      axis.text.x = element_text(color = "White"), #White-out the x-axis label
                      axis.ticks.x = element_blank(), # No ticks on x-axis
                      plot.background=element_blank()) + #Set the plot background
  ggtitle("(b)") + #add a main title
  scale_color_manual(values=c(AMB="cadetblue3", l="palevioletred", xl="indianred3")) + #set treatment colors
  theme(plot.title = element_text(face = 'bold', 
                                  size = 12, 
                                  hjust = 0),
        legend.position = "bottom") #set title attributes
egg_volume_boxplot

#QQ-Plot
qqnorm(egg_volume$volume)
qqline(egg_volume$volume)

Examine egg volume for equal variance with residual regression.

#Create dataframe of predicted and residual values for each volume
egg_volume_res <- egg_volume #Copy dataset to manipulate for residual regression
egg_volume_fit <- lm(volume ~ sample_ID, data = egg_volume_res)  # Fit the model
egg_volume_res$predicted <- predict(egg_volume_fit)   # Save the predicted values
egg_volume_res$residuals <- residuals(egg_volume_fit) # Save the residual values
head(select(egg_volume_res, volume, predicted, residuals))
## # A tibble: 6 x 3
##   volume predicted residuals
##    <dbl>     <dbl>     <dbl>
## 1 0.0445    0.0495  -0.00494
## 2 0.0461    0.0495  -0.00334
## 3 0.0381    0.0495  -0.0113 
## 4 0.0292    0.0495  -0.0203 
## 5 0.0544    0.0495   0.00497
## 6 0.0455    0.0495  -0.00394
#Plot predicted (red) versus residuals (black)
egg_volume_res_scatter <- ggplot(egg_volume_res, aes(x = sample_ID, y = volume)) + # Plot volume versus treament as scatter
  geom_point() +
  geom_point(aes(y = predicted), color = "red") + # Add the predicted values
  ylab("Volume mm2")+ xlab("Sample ID") + #Label axes
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background
egg_volume_res_scatter

#Histogram of residuals
egg_volume_res_hist <- egg_volume_res %>%
  ggplot(aes(x = residuals, fill = volume)) +
    geom_histogram(binwidth = 0.01, color="#e9ecef", alpha=0.6, position = 'identity') +
    labs(fill="") + # Plot volume versus treament as scatter
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background)
egg_volume_res_hist

Examine summary statistics for untreated eggs

untreatedegg <- select(egg_volume, time_point, volume)
untreatedegg.stats <- untreatedegg %>% 
  summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
notreatment <- as.factor(c("NA"))
untreatedegg.stats$treatment <- notreatment

untreatedegg.stats.rownames <- as.factor(c("Untreated_Eggs"))
untreatedegg.stats$timepoint <-untreatedegg.stats.rownames
untreatedegg.stats <- untreatedegg.stats[c(6,5,1,2,3,4)]
show(untreatedegg.stats)
## # A tibble: 1 x 6
##   timepoint      treatment   mean     sd     min    max
##   <fct>          <fct>      <dbl>  <dbl>   <dbl>  <dbl>
## 1 Untreated_Eggs NA        0.0396 0.0230 0.00150 0.0830
Final plot
final_egg_volume <- egg_volume_boxplot
final_egg_volume

Treated Eggs

Calculate treated eggs volume.
#Use the equation for volume of ellipse to calculate fertilized egg volume.
fert_volume <- filter(raw_data, time_point == "Fertilized_embryos") %>% 
  mutate(volume = ((4*pi*width1/6)*(width2/2)^2)) #Use equation for volume of ellipse
head(fert_volume)
## # A tibble: 6 x 8
##   sample_ID time_point    treatment TANK_NUM metric_id      width1 width2 volume
##   <fct>     <fct>         <fct>     <fct>    <fct>           <dbl>  <dbl>  <dbl>
## 1 135       Fertilized_e… AMB       1        135_fertilzed…  0.456  0.464 0.0514
## 2 135       Fertilized_e… AMB       1        135_fertilzed…  0.424  0.436 0.0422
## 3 135       Fertilized_e… AMB       1        135_fertilzed…  0.435  0.466 0.0495
## 4 135       Fertilized_e… AMB       1        135_fertilzed…  0.42   0.464 0.0473
## 5 135       Fertilized_e… AMB       1        135_fertilzed…  0.449  0.473 0.0526
## 6 135       Fertilized_e… AMB       1        135_fertilzed…  0.465  0.469 0.0536
Examine the data

Examine treated eggs volume for normal distribution using a boxplot with jitter and qq-plot. Examine variation between tanks using a boxplot with jitter.

#Boxplot colored by treatment
fert_volume_t_boxplot <- ggplot(fert_volume, aes(x = treatment, y = volume, color = treatment)) + 
  ylab(expression("Volume " (mm^2))) + xlab("Treatment") + #Label axes
  geom_jitter(alpha = 0.5) +
  geom_boxplot(alpha = 0) +
  theme_bw() + #Set background color
  scale_color_manual(values=c(AMB="cadetblue3", l="palevioletred", xl="indianred3")) + #set treatment colors
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) + #Set the plot background
  ggtitle("(c)") + #add a main title
  scale_color_manual(values=c(AMB="cadetblue3", l="palevioletred", xl="indianred3")) + #set treatment colors
  theme(plot.title = element_text(face = 'bold', 
                                  size = 12, 
                                  hjust = 0),
        legend.position = "none") #set title attributes
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
fert_volume_t_boxplot

#Boxplot colored by TANK_NUM
fert_volume_s_boxplot <- ggplot(fert_volume, aes(x = treatment, y = volume, color = TANK_NUM)) + 
  ylab("Volume mm2")+ xlab("Treatment") + #Label axes
  geom_jitter(aes(alpha = 0.3)) +
  geom_boxplot(alpha = 0) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background
fert_volume_s_boxplot

#QQ-Plot
qqnorm(fert_volume$volume)
qqline(fert_volume$volume)

Examine treated eggs volume for equal variance with residual regression.

#Create dataframe of predicted and residual values for each volume
fert_volume_res <- fert_volume #Copy dataset to manipulate for residual regression
fert_volume_fit <- lm(volume ~ treatment, data = fert_volume_res)  # Fit the model
fert_volume_res$predicted <- predict(fert_volume_fit)   # Save the predicted values
fert_volume_res$residuals <- residuals(fert_volume_fit) # Save the residual values
head(select(fert_volume_res, treatment, volume, predicted, residuals))
## # A tibble: 6 x 4
##   treatment volume predicted residuals
##   <fct>      <dbl>     <dbl>     <dbl>
## 1 AMB       0.0514    0.0483   0.00306
## 2 AMB       0.0422    0.0483  -0.00614
## 3 AMB       0.0495    0.0483   0.00111
## 4 AMB       0.0473    0.0483  -0.00100
## 5 AMB       0.0526    0.0483   0.00425
## 6 AMB       0.0536    0.0483   0.00521
#Plot predicted (red) versus residuals (black)
fert_volume_res_scatter <- ggplot(fert_volume_res, aes(x = treatment, y = volume)) + # Plot volume versus treament as scatter
  geom_point() +
  geom_point(aes(y = predicted), color = "red") + # Add the predicted values
  ylab("Volume mm2")+ xlab("Treatment") + #Label axes
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background
fert_volume_res_scatter

#Histogram of residuals
fert_volume_res_hist <- fert_volume_res %>%
  ggplot(aes(x = residuals, fill = volume)) +
    geom_histogram(binwidth = 0.0025, color="#e9ecef", alpha=0.6, position = 'identity') +
    labs(fill="") + # Plot volume versus treament as scatter
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background)
fert_volume_res_hist

#Histogram of residuals separated by treatment
fert_volume_res_t_hist <- fert_volume_res %>%
  ggplot(aes(x = residuals, fill = treatment)) +
    geom_histogram(binwidth = 0.0025, color="#e9ecef", alpha=0.6, position = 'identity') +
    labs(fill="") + # Plot volume versus treament as scatter
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) + #Set the plot background)
facet_wrap(facets = vars(treatment)) #Split 1 graph into three each showing a different treatment
fert_volume_res_t_hist

Statistical Analysis

Testing for tank effects: 1-Way ANOVA of treated eggs volume against tank nested within treatment.

#1-Way ANOVA of volume against tank nested within treatment.
fert_volume_nested_anova <- aov(volume ~ treatment/TANK_NUM, data = fert_volume)
summary(fert_volume_nested_anova)
##                     Df   Sum Sq   Mean Sq F value  Pr(>F)    
## treatment            2 0.000140 0.0000702    1.37   0.256    
## treatment:TANK_NUM   6 0.003343 0.0005571   10.88 1.4e-10 ***
## Residuals          216 0.011058 0.0000512                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Testing the effects of treatment on volume:

#1-Way ANOVA of volume against treatment
fert_volume_anova <- aov(volume ~ treatment, data = fert_volume)
summary(fert_volume_anova)
##              Df  Sum Sq   Mean Sq F value Pr(>F)
## treatment     2 0.00014 7.015e-05   1.082  0.341
## Residuals   222 0.01440 6.487e-05

Examine summary statistics for treated eggs

treatedegg <- select(fert_volume, time_point:treatment, volume)
treatedegg.amb <- treatedegg %>% filter(treatment=="AMB")
treatedegg.amb.stats <- treatedegg.amb %>% summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
amb <- as.factor(c("AMB"))
treatedegg.amb.stats$treatment <- amb

treatedegg.l <- treatedegg %>% filter(treatment=="l")
treatedegg.l.stats <- treatedegg.l %>% summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
l <- as.factor(c("l"))
treatedegg.l.stats$treatment <- l

treatedegg.xl <- treatedegg %>% filter(treatment=="xl")
treatedegg.xl.stats <- treatedegg.xl %>% summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
xl <- as.factor(c("xl"))
treatedegg.xl.stats$treatment <- xl

treatedegg.stats.rownames <- as.factor(c("Treated_egg", "Treated_egg", "Treated_egg"))
treatedegg.stats <- as.data.frame(bind_rows(treatedegg.amb.stats, treatedegg.l.stats, treatedegg.xl.stats, .id=NULL))
treatedegg.stats$timepoint <-treatedegg.stats.rownames
treatedegg.stats <- treatedegg.stats[c(6,5,1,2,3,4)]
show(treatedegg.stats)
##     timepoint treatment       mean          sd        min        max
## 1 Treated_egg       AMB 0.04834626 0.008168517 0.02827965 0.07318527
## 2 Treated_egg         l 0.04762602 0.006896299 0.03435164 0.07040271
## 3 Treated_egg        xl 0.04954085 0.008961923 0.03259548 0.07502386
Final Plot
final_fert_volume <- fert_volume_t_boxplot +
  annotate("text", x = 1.2, y = 0.068, label = "a") +
  annotate("text", x = 2.2, y = 0.068, label = "a") +
  annotate("text", x = 3.2, y = 0.068, label = "a")
final_fert_volume

Gastrulation

Calculate Gastrula Volume
gas_volume <- filter(raw_data, time_point == "Gastrulation") %>% 
  mutate(volume = ((4*pi*width1/6)*(width2/2)^2))  #Use equation for volume of ellipse
head(gas_volume)
## # A tibble: 6 x 8
##   sample_ID time_point   treatment TANK_NUM metric_id       width1 width2 volume
##   <fct>     <fct>        <fct>     <fct>    <fct>            <dbl>  <dbl>  <dbl>
## 1 187       Gastrulation AMB       1        187_gastrulati…  0.614  0.456 0.0668
## 2 187       Gastrulation AMB       1        187_gastrulati…  0.571  0.492 0.0724
## 3 187       Gastrulation AMB       1        187_gastrulati…  0.581  0.489 0.0727
## 4 187       Gastrulation AMB       1        187_gastrulati…  0.572  0.382 0.0437
## 5 187       Gastrulation AMB       1        187_gastrulati…  0.701  0.37  0.0502
## 6 187       Gastrulation AMB       1        187_gastrulati…  0.551  0.511 0.0753
Examine the Data

Examine gastrula volume for normal distribution using a boxplot with jitter and qq-plot. Examine variation between tanks using a boxplot with jitter.

#Boxplot colored by treatment
gas_volume_t_boxplot <- ggplot(gas_volume, aes(x = treatment, y = volume, color = treatment)) +
  ylab("Volume mm2")+ xlab("Treatment") + #Label axes
  geom_jitter(aes(alpha = 0.3)) +
  geom_boxplot(alpha = 0) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background
show(gas_volume_t_boxplot)

#Boxplot colored by TANK_NUM
gas_volume_s_boxplot <- ggplot(gas_volume, aes(x = treatment, y = volume, color = TANK_NUM)) + 
  ylab("Volume mm2")+ xlab("Treatment") + #Label axes
  geom_jitter(aes(alpha = 0.3)) +
  geom_boxplot(alpha = 0) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background
show(gas_volume_s_boxplot)

#QQ-Plot
qqnorm(gas_volume$volume)
qqline(gas_volume$volume)

Transform the Data

The qq-plot shows that the data does not look normal. We will transform the data below using the ‘SQRT’ function.

gas_volume_sqrt <- mutate(gas_volume, sqrt_volume = sqrt(volume)) # Transform the distribution
head(gas_volume_sqrt)
## # A tibble: 6 x 9
##   sample_ID time_point treatment TANK_NUM metric_id width1 width2 volume
##   <fct>     <fct>      <fct>     <fct>    <fct>      <dbl>  <dbl>  <dbl>
## 1 187       Gastrulat… AMB       1        187_gast…  0.614  0.456 0.0668
## 2 187       Gastrulat… AMB       1        187_gast…  0.571  0.492 0.0724
## 3 187       Gastrulat… AMB       1        187_gast…  0.581  0.489 0.0727
## 4 187       Gastrulat… AMB       1        187_gast…  0.572  0.382 0.0437
## 5 187       Gastrulat… AMB       1        187_gast…  0.701  0.37  0.0502
## 6 187       Gastrulat… AMB       1        187_gast…  0.551  0.511 0.0753
## # … with 1 more variable: sqrt_volume <dbl>
Examine the Data Again

Examine sqrt-transformed gastrula volume for normal distribution using a boxplot with jitter and qq-plot. Examine variation between tanks using a boxplot with jitter.

#Boxplot colored by treatment
gas_sqrt_volume_t_boxplot <- ggplot(gas_volume_sqrt, aes(x = treatment, y = (sqrt_volume)^2, color = treatment)) + #Square the sqrt volume to make size visually comparable across life stages
   ylab(expression("Volume " (mm^2))) + xlab("Treatment") + #Label axes
  geom_jitter(alpha = 0.5) +
  geom_boxplot(alpha = 0) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) + #Set the plot background
  ggtitle("(d)") + #add a main title
  scale_color_manual(values=c(AMB="cadetblue3", l="palevioletred", xl="indianred3")) + #set treatment colors
  theme(plot.title = element_text(face = 'bold', 
                                  size = 12, 
                                  hjust = 0),
        legend.position = "none") #set title attributes
show(gas_sqrt_volume_t_boxplot)

#Boxplot colored by TANK_NUM
gas_sqrt_volume_s_boxplot <- ggplot(gas_volume_sqrt, aes(x = treatment, y = sqrt_volume, color = TANK_NUM)) + 
  ylab("Volume mm2")+ xlab("Treatment") + #Label axes
  geom_jitter(aes(alpha = 0.3)) +
  geom_boxplot(alpha = 0) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background
show(gas_volume_s_boxplot)

#QQ-Plot
qqnorm(gas_volume_sqrt$sqrt_volume)
qqline(gas_volume_sqrt$sqrt_volume)

Examine sqrt-transformed gastrulation volume for equal variance with residual regression.

#Create dataframe of predicted and residual values for each volume
gas_sqrt_volume_res <- gas_volume_sqrt #Copy dataset to manipulate for residual regression
gas_sqrt_volume_fit <- lm(sqrt_volume ~ treatment, data = gas_sqrt_volume_res)  # Fit the model
gas_sqrt_volume_res$predicted <- predict(gas_sqrt_volume_fit)   # Save the predicted values
gas_sqrt_volume_res$residuals <- residuals(gas_sqrt_volume_fit) # Save the residual values
head(select(gas_sqrt_volume_res, treatment, sqrt_volume, predicted, residuals))
## # A tibble: 6 x 4
##   treatment sqrt_volume predicted residuals
##   <fct>           <dbl>     <dbl>     <dbl>
## 1 AMB             0.259     0.252   0.00680
## 2 AMB             0.269     0.252   0.0173 
## 3 AMB             0.270     0.252   0.0180 
## 4 AMB             0.209     0.252  -0.0427 
## 5 AMB             0.224     0.252  -0.0276 
## 6 AMB             0.274     0.252   0.0227
#Plot predicted (red) versus residuals (black)
gas_sqrt_volume_res_scatter <- ggplot(gas_sqrt_volume_res, aes(x = treatment, y = sqrt_volume)) + # Plot volume versus treament as scatter
  geom_point() +
  geom_point(aes(y = predicted), color = "red") + # Add the predicted values +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background
gas_sqrt_volume_res_scatter

#Histogram of residuals
gas_sqrt_volume_res_hist <- gas_sqrt_volume_res %>%
  ggplot(aes(x = residuals, fill = volume)) +
    geom_histogram(binwidth = 0.01, color="#e9ecef", alpha=0.6, position = 'identity') +
    labs(fill="") + # Plot volume versus treament as scatter
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background)
gas_sqrt_volume_res_hist

#Histogram of residuals separated by treatment
gas_sqrt_volume_res_t_hist <- gas_sqrt_volume_res %>%
  ggplot(aes(x = residuals, fill = treatment)) +
    geom_histogram(binwidth = 0.01, color="#e9ecef", alpha=0.6, position = 'identity') +
    labs(fill="") + # Plot volume versus treament as scatter
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) + #Set the plot background)
facet_wrap(facets = vars(treatment)) #Split 1 graph into three each showing a different treatment
gas_sqrt_volume_res_t_hist

Statistical Analysis

Testing for tank effects: 1-Way ANOVA of sqrt-transformed planulae volume against tank nested within treatment.

#1-Way ANOVA of volume against tank nested within treatment.
gas_sqrt_volume_ts_anova <- aov(volume ~ treatment/TANK_NUM, data = gas_volume_sqrt)
summary(gas_sqrt_volume_ts_anova)
##                     Df   Sum Sq   Mean Sq F value   Pr(>F)    
## treatment            2 0.002446 0.0012231   8.490 0.000307 ***
## treatment:TANK_NUM   4 0.000510 0.0001275   0.885 0.474327    
## Residuals          168 0.024204 0.0001441                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Testing the effects of treatment on volume:

#1-Way ANOVA of volume against treatment.
gas_sqrt_volume_txs_anova <- aov(sqrt_volume ~ treatment, data = gas_volume_sqrt)
summary(gas_sqrt_volume_txs_anova)
##              Df  Sum Sq  Mean Sq F value   Pr(>F)    
## treatment     2 0.01002 0.005012   8.535 0.000292 ***
## Residuals   172 0.10101 0.000587                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(gas_sqrt_volume_txs_anova)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = sqrt_volume ~ treatment, data = gas_volume_sqrt)
## 
## $treatment
##                diff          lwr          upr     p adj
## l-AMB  -0.017399962 -0.027859991 -0.006939933 0.0003564
## xl-AMB -0.012094266 -0.022554295 -0.001634237 0.0188790
## xl-l    0.005305696 -0.006152691  0.016764084 0.5186419

Examine summary statistics for gastrula

gas <- select(gas_volume, time_point:treatment, volume)

gas.amb <- gas %>% filter(treatment=="AMB")
gas.amb.stats <- gas.amb %>% summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
gas.amb.stats$treatment <- amb

gas.l <- gas %>% filter(treatment=="l")
gas.l.stats <- gas.l %>% summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
gas.l.stats$treatment <- l

gas.xl <- gas %>% filter(treatment=="xl")
gas.xl.stats <- gas.xl %>% summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
gas.xl.stats$treatment <- xl

gas.stats.rownames <- as.factor(c("Gastrulation", "Gastrulation", "Gastrulation"))
gas.stats <- as.data.frame(bind_rows(gas.amb.stats, gas.l.stats, gas.xl.stats, .id=NULL))
gas.stats$timepoint <-gas.stats.rownames
gas.stats <- gas.stats[c(6,5,1,2,3,4)]
show(gas.stats)
##      timepoint treatment       mean         sd        min        max
## 1 Gastrulation       AMB 0.06404722 0.01338058 0.03962596 0.11147668
## 2 Gastrulation         l 0.05556756 0.01218978 0.03560216 0.08503743
## 3 Gastrulation        xl 0.05781054 0.00924022 0.03422066 0.07561363
Final plot
final_gas_volume <- gas_sqrt_volume_t_boxplot +
  annotate("text", x = 1.2, y = 0.1, label = "a") +
  annotate("text", x = 2.2, y = 0.1, label = "b") +
  annotate("text", x = 3.2, y = 0.1, label = "b")
final_gas_volume

Planulae

Calculate planulae volume
planulae_volume <- filter(raw_data, time_point == "Planulae") %>% 
  mutate(volume = ((4*pi*width1/6)*(width2/2)^2))  #Use equation for volume of ellipse
head(planulae_volume)
## # A tibble: 6 x 8
##   sample_ID time_point treatment TANK_NUM metric_id      width1 width2 volume
##   <fct>     <fct>      <fct>     <fct>    <fct>           <dbl>  <dbl>  <dbl>
## 1 362       Planulae   AMB       10       362_planulae_1  0.851  0.441 0.0867
## 2 362       Planulae   AMB       10       362_planulae_2  0.817  0.444 0.0843
## 3 362       Planulae   AMB       10       362_planulae_3  0.684  0.388 0.0539
## 4 362       Planulae   AMB       10       362_planulae_4  0.796  0.515 0.111 
## 5 362       Planulae   AMB       10       362_planulae_5  0.75   0.452 0.0802
## 6 362       Planulae   AMB       10       362_planulae_6  0.892  0.405 0.0766
Examine the Data

Examine planula volume for normal distribution using a boxplot with jitter and qq-plot. Examine variation between tanks using a boxplot with jitter.

#Boxplot colored by treatment
planulae_volume_t_boxplot <- ggplot(planulae_volume, aes(x = treatment, y = volume, color = treatment)) +
  ylab("Volume mm2")+ xlab("Treatment") + #Label axes
  geom_jitter(aes(alpha = 0.3)) +
  geom_boxplot(alpha = 0) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background
show(planulae_volume_t_boxplot)

#Boxplot colored by TANK_NUM
planulae_volume_s_boxplot <- ggplot(planulae_volume, aes(x = treatment, y = volume, color = TANK_NUM)) + 
  ylab("Volume mm2")+ xlab("Treatment") + #Label axes
  geom_jitter(aes(alpha = 0.3)) +
  geom_boxplot(alpha = 0) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background
show(planulae_volume_s_boxplot)

#QQ-Plot
qqnorm(planulae_volume$volume)
qqline(planulae_volume$volume)

Transform the Data

The qq-plot shows that the data does not look normal. We will transform the data below using the ‘SQRT’ function.

planulae_volume_sqrt <- mutate(planulae_volume, sqrt_volume = sqrt(volume)) # Transform the distribution
head(planulae_volume_sqrt)
## # A tibble: 6 x 9
##   sample_ID time_point treatment TANK_NUM metric_id width1 width2 volume
##   <fct>     <fct>      <fct>     <fct>    <fct>      <dbl>  <dbl>  <dbl>
## 1 362       Planulae   AMB       10       362_plan…  0.851  0.441 0.0867
## 2 362       Planulae   AMB       10       362_plan…  0.817  0.444 0.0843
## 3 362       Planulae   AMB       10       362_plan…  0.684  0.388 0.0539
## 4 362       Planulae   AMB       10       362_plan…  0.796  0.515 0.111 
## 5 362       Planulae   AMB       10       362_plan…  0.75   0.452 0.0802
## 6 362       Planulae   AMB       10       362_plan…  0.892  0.405 0.0766
## # … with 1 more variable: sqrt_volume <dbl>
Examine the Data Again

Examine sqrt-transformed planula volume for normal distribution using a boxplot with jitter and qq-plot. Examine variation between tanks using a boxplot with jitter.

#Boxplot colored by treatment
planulae_sqrt_volume_t_boxplot <- ggplot(planulae_volume_sqrt, aes(x = treatment, y = (sqrt_volume)^2, color = treatment)) + #Square the sqrt volume to make size visually comparable across life stages
  ylab(expression("Volume " (mm^2))) + xlab("Treatment") + #Label axes
  geom_jitter(alpha = 0.5) +
  geom_boxplot(alpha = 0) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) + #Set the plot background
  ggtitle("(e)") + #add a main title
  scale_color_manual(values=c(AMB="cadetblue3", l="palevioletred", xl="indianred3")) + #set treatment colors
  theme(plot.title = element_text(face = 'bold', 
                                  size = 12, 
                                  hjust = 0), 
        legend.position = ("none")) #set title attributes
planulae_sqrt_volume_t_boxplot

#Boxplot colored by TANK_NUM
planulae_sqrt_volume_s_boxplot <- ggplot(planulae_volume_sqrt, aes(x = treatment, y = sqrt_volume, color = TANK_NUM)) + 
  ylab("Volume mm2")+ xlab("Treatment") + #Label axes
  geom_jitter(aes(alpha = 0.3)) +
  geom_boxplot(alpha = 0) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background
show(planulae_volume_s_boxplot)

#QQ-Plot
qqnorm(planulae_volume_sqrt$sqrt_volume)
qqline(planulae_volume_sqrt$sqrt_volume)

Examine sqrt-transformed planulae volume for equal variance with residual regression.

#Create dataframe of predicted and residual values for each volume
planulae_sqrt_volume_res <- planulae_volume_sqrt #Copy dataset to manipulate for residual regression
planulae_sqrt_volume_fit <- lm(sqrt_volume ~ treatment, data = planulae_sqrt_volume_res)  # Fit the model
planulae_sqrt_volume_res$predicted <- predict(planulae_sqrt_volume_fit)   # Save the predicted values
planulae_sqrt_volume_res$residuals <- residuals(planulae_sqrt_volume_fit) # Save the residual values
head(select(planulae_sqrt_volume_res, treatment, sqrt_volume, predicted, residuals))
## # A tibble: 6 x 4
##   treatment sqrt_volume predicted residuals
##   <fct>           <dbl>     <dbl>     <dbl>
## 1 AMB             0.294     0.242   0.0526 
## 2 AMB             0.290     0.242   0.0486 
## 3 AMB             0.232     0.242  -0.00960
## 4 AMB             0.332     0.242   0.0907 
## 5 AMB             0.283     0.242   0.0415 
## 6 AMB             0.277     0.242   0.0350
#Plot predicted (red) versus residuals (black)
planulae_sqrt_volume_res_scatter <- ggplot(planulae_sqrt_volume_res, aes(x = treatment, y = sqrt_volume)) + # Plot volume versus treament as scatter
  geom_point() +
  geom_point(aes(y = predicted), color = "red") + # Add the predicted values +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background
planulae_sqrt_volume_res_scatter

#Histogram of residuals
planulae_sqrt_volume_res_hist <- planulae_sqrt_volume_res %>%
  ggplot(aes(x = residuals, fill = volume)) +
    geom_histogram(binwidth = 0.01, color="#e9ecef", alpha=0.6, position = 'identity') +
    labs(fill="") + # Plot volume versus treament as scatter
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background)
planulae_sqrt_volume_res_hist

#Histogram of residuals separated by treatment
planulae_sqrt_volume_res_t_hist <- planulae_sqrt_volume_res %>%
  ggplot(aes(x = residuals, fill = treatment)) +
    geom_histogram(binwidth = 0.01, color="#e9ecef", alpha=0.6, position = 'identity') +
    labs(fill="") + # Plot volume versus treament as scatter
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) + #Set the plot background)
facet_wrap(facets = vars(treatment)) #Split 1 graph into three each showing a different treatment
planulae_sqrt_volume_res_t_hist

Statistical Analysis

Testing for tank effects: 1-Way ANOVA of sqrt-transformed planulae volume against treatment. 1-Way ANOVA of volume against tank nested within treatment.

#1-Way ANOVA of volume against tank nested within treatment.
planulae_sqrt_volume_ts_anova <- aov(volume ~ treatment/TANK_NUM, data = planulae_volume_sqrt)
summary(planulae_sqrt_volume_ts_anova)
##                     Df  Sum Sq   Mean Sq F value   Pr(>F)    
## treatment            2 0.00506 0.0025286   10.30 6.57e-05 ***
## treatment:TANK_NUM   3 0.00144 0.0004809    1.96    0.123    
## Residuals          144 0.03534 0.0002454                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Testing the effects of treatment on volume:

#1-Way ANOVA of volume against treatment
planulae_sqrt_volume_txs_anova <- aov(sqrt_volume ~ treatment, data = planulae_volume_sqrt)
summary(planulae_sqrt_volume_txs_anova)
##              Df  Sum Sq  Mean Sq F value   Pr(>F)    
## treatment     2 0.02277 0.011387   9.716 0.000109 ***
## Residuals   147 0.17227 0.001172                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(planulae_sqrt_volume_txs_anova)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = sqrt_volume ~ treatment, data = planulae_volume_sqrt)
## 
## $treatment
##                diff          lwr          upr     p adj
## l-AMB  -0.029414826 -0.045625704 -0.013203947 0.0000928
## xl-AMB -0.020562553 -0.036773432 -0.004351675 0.0087664
## xl-l    0.008852273 -0.007358606  0.025063151 0.4014180

Examine summary statistics for planula

plan <- select(planulae_volume, time_point:treatment, volume)

plan.amb <- plan %>% filter(treatment=="AMB")
plan.amb.stats <- plan.amb %>% summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
plan.amb.stats$treatment <- amb

plan.l <- plan %>% filter(treatment=="l")
plan.l.stats <- plan.l %>% summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
plan.l.stats$treatment <- l

plan.xl <- plan %>% filter(treatment=="xl")
plan.xl.stats <- plan.xl %>% summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
plan.xl.stats$treatment <- xl

plan.stats.rownames <- as.factor(c("Planulae", "Planulae", "Planulae"))
plan.stats <- as.data.frame(bind_rows(plan.amb.stats, plan.l.stats, plan.xl.stats, .id=NULL))
plan.stats$timepoint <-plan.stats.rownames
plan.stats <- plan.stats[c(6,5,1,2,3,4)]
show(plan.stats)
##   timepoint treatment       mean         sd        min        max
## 1  Planulae       AMB 0.05984008 0.01878055 0.02089216 0.11329448
## 2  Planulae         l 0.04598178 0.01271031 0.02244043 0.07760985
## 3  Planulae        xl 0.05013986 0.01537470 0.01709592 0.08994879
Final table
volume.stats <- as.data.frame(bind_rows(untreatedegg.stats, treatedegg.stats, gas.stats, plan.stats, .id=NULL))
volume.stats <- volume.stats[c(6,1,2,3,4,5)]
show(volume.stats)
##           max      timepoint treatment       mean          sd         min
## 1  0.08299064 Untreated_Eggs        NA 0.03958601 0.023022591 0.001504261
## 2  0.07318527    Treated_egg       AMB 0.04834626 0.008168517 0.028279649
## 3  0.07040271    Treated_egg         l 0.04762602 0.006896299 0.034351644
## 4  0.07502386    Treated_egg        xl 0.04954085 0.008961923 0.032595475
## 5  0.11147668   Gastrulation       AMB 0.06404722 0.013380581 0.039625961
## 6  0.08503743   Gastrulation         l 0.05556756 0.012189782 0.035602165
## 7  0.07561363   Gastrulation        xl 0.05781054 0.009240220 0.034220665
## 8  0.11329448       Planulae       AMB 0.05984008 0.018780552 0.020892161
## 9  0.07760985       Planulae         l 0.04598178 0.012710312 0.022440434
## 10 0.08994879       Planulae        xl 0.05013986 0.015374697 0.017095919
Final plot
final_plan_volume <- planulae_sqrt_volume_t_boxplot +
  annotate("text", x = 1.2, y = 0.105, label = "a") +
  annotate("text", x = 2.2, y = 0.105, label = "b") +
  annotate("text", x = 3.2, y = 0.105, label = "b")
final_plan_volume

fig_2 <- grid.arrange(final_cell_cleavage, arrangeGrob(final_egg_volume, final_fert_volume, final_gas_volume, final_plan_volume, ncol = 2, nrow = 2), ncol = 2, nrow = 1, widths = c(3,7), clip = "off")

ggsave("5-Developmental_progression_and_size/Output/fig2_embryo_development.pdf", fig_2, width = 16, height = 8, units = c("in"))